$t_{min} = 2007 \, ,\quad t_{max} = 2017$
Input Data
begin = "2007-01-01"
finish = "2017-01-01"
N=20
#data_loader(begin, finish, 'returns.csv')
returns = get_returns('returns.csv', N)
delete_nans('returns.csv', 'returns.csv')
data_head('returns.csv', N)
Number of NaNs: 0 shape Dataframe: (2518, 20)
| MMM | ABT | ABBV | ABMD | ACN | ATVI | ADBE | AMD | AAP | AES | AMG | AFL | A | APD | AKAM | ALK | ALB | ARE | ALXN | ALGN | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | ||||||||||||||||||||
| 2007-01-03 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 2007-01-04 | -0.003961 | 0.018991 | 0.0 | 0.021352 | 0.022540 | 0.008158 | 0.022545 | 0.013832 | 0.006464 | 0.000456 | -0.010861 | -0.011027 | 0.003207 | -0.021972 | 0.023622 | 0.019508 | -0.004584 | -0.001686 | 0.002955 | 0.018727 |
| 2007-01-05 | -0.006799 | 0.000000 | 0.0 | -0.014634 | -0.013979 | -0.005780 | -0.004900 | -0.004043 | -0.022061 | -0.028702 | -0.008021 | 0.001312 | -0.009299 | -0.013934 | -0.002064 | -0.036116 | -0.017446 | -0.019664 | -0.006629 | 0.002941 |
| 2007-01-08 | 0.002195 | 0.003607 | 0.0 | -0.002122 | 0.014449 | -0.000581 | -0.004185 | -0.012177 | 0.003427 | -0.014071 | 0.013187 | 0.000655 | -0.003520 | -0.001298 | -0.003384 | 0.021092 | -0.005114 | -0.009523 | -0.027435 | 0.002933 |
| 2007-01-09 | 0.001160 | 0.008786 | 0.0 | -0.010631 | -0.002419 | -0.000582 | -0.020272 | 0.009245 | 0.008537 | -0.040914 | 0.009690 | 0.003709 | 0.001178 | -0.002166 | -0.006414 | 0.008263 | 0.005854 | 0.024956 | -0.007624 | 0.001462 |
plot_example_returns1('returns.csv', N)
def get_data_subsets(df, dur, variant, onehot):
"""
variant options: month, vola, next_day
"""
branches_ = pd.read_csv('snp500info.csv', index_col=0)
branches = branches_.drop(columns = ['Security', 'GICS Sub Industry', 'start at yahoo'])
companies = list(df)
data_onehot = branches.loc[branches.index.isin(companies)].iloc[:,0]
# integer encode
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(data_onehot)
# binary encode
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
onehot_encoded = onehot_encoder.fit_transform(integer_encoded).T
df = np.array(df)
month = 30
leng = df.shape[0]
x=[]
y=[]
for i in range(leng):
if dur+i>=leng:
break
if onehot:
x_inp = np.concatenate((onehot_encoded, df[i:dur+i,:]))
else:
x_inp = df[i:dur+i,:]
if variant == 'vola':
x.append(np.abs(x_inp))
y.append(np.abs(df[dur+i,:]))
else:
x.append(x_inp)
if variant == 'month':
y.append(np.mean(df[dur+i:dur+i+month,:], axis = 0))
elif variant == 'next_day':
y.append(df[dur+i,:])
else:
raise NameError("Use correct fun_label")
return np.array(x),np.array(y)
pd.read_csv('snp500info.csv', index_col=0).head()
branches_ = pd.read_csv('snp500info.csv', index_col=0)
branches = branches_.drop(columns = ['Security', 'GICS Sub Industry', 'start at yahoo'])
branches.head()
| GICS Sector | |
|---|---|
| Ticker symbol | |
| MMM | Industrials |
| ABT | Health Care |
| ABBV | Health Care |
| ACN | Information Technology |
| ATVI | Information Technology |
data = branches.iloc[:,0]
# integer encode
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(data)
print(integer_encoded)
[ 5 4 4 6 6 5 6 6 0 10 4 3 3 4 7 6 5 7 8 4 4 5 4 6 10 3 6 6 1 0 10 5 10 3 3 8 10 3 4 5 4 6 2 6 2 6 4 3 5 2 8 6 6 1 5 3 3 9 6 6 0 8 7 2 7 3 3 4 4 3 4 3 0 4 3 3 5 0 8 4 3 4 6 1 5 6 2 6 1 3 4 3 0 0 5 8 0 4 4 10 9 4 7 3 0 2 2 0 3 1 4 2 3 5 6 3 3 6 1 3 10 1 6 1 0 3 1 2 2 10 1 4 6 1 1 8 6 5 5 1 0 4 0 4 5 0 5 4 2 8 3 0 0 0 0 0 10 5 7 1 10 8 10 6 3 7 5 6 7 10 4 6 5 10 4 2 2 5 8 8 8 1 10 3 10 0 5 4 8 2 6 6 5 8 5 6 3 10 6 6 5 5 7 0 0 5 5 3 7 0 0 6 5 5 8 1 0 0 4 6 3 0 5 2 0 0 6 3 0 4 8 2 4 1 2 6 0 4 0 5 1 8 6 4 3 4 5 5 4 5 6 3 6 4 7 0 7 6 4 3 4 8 5 5 1 4 5 3 6 5 1 3 1 8 2 6 0 1 1 0 5 4 6 0 0 3 4 3 0 5 3 0 7 3 8 0 2 2 0 3 7 5 6 0 1 0 4 4 4 3 4 0 0 6 6 6 8 0 1 1 7 1 3 3 7 6 4 3 2 3 6 6 0 2 7 0 0 10 5 0 10 2 0 5 3 5 0 10 7 6 0 2 0 2 6 5 7 5 4 6 6 5 3 1 4 4 4 10 1 2 10 2 3 0 7 10 7 0 3 1 3 8 3 10 8 0 0 6 5 6 4 2 3 5 8 6 8 4 3 5 4 5 5 5 5 0 0 6 8 10 2 0 6 7 10 7 0 8 6 8 0 10 5 3 0 0 3 5 4 3 6 3 6 1 3 0 0 6 2 6 5 4 0 0 0 3 6 0 5 3 0 0 0 1 8 0 3 0 0 5 5 4 5 5 5 4 3 0 2 4 8 6 5 9 4 0 6 8 7 1 1 0 5 4 10 3 8 6 6 7 8 0 2 3 0 0 10 6 6 3 5 0 4 3 4]
# binary encode
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
df = pd.DataFrame(onehot_encoded)
df.head()
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 1 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 3 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 4 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 |
Evaluation Types:
def directional_goodness(model,x_test,y_test):
"""
directional goodnes gives the factor of correctly predicted signs of first order derivative of returns to false ones
oder auch:
gibt die Anzahl der Beobachtungen an, deren Vorhersage das richtige Vorzeichen hatte (Kurs steigt, Kurs fƤllt)
"""
try:
y_pred = model.predict(x_test)
except AttributeError:
y_pred = model(x_test)
count = 0
#print(y_new.shape)
for i in range(len(y_pred)):
for j in range(len(y_pred[i])):
p= y_pred[i,j] > 0
n = y_test[i,j] > 0
if n==p:
count +=1
print('percentage of correctly predicted directions of returns: ' + str(count/len(y_pred)/len(y_pred[0])))
def largest(array, amount):
return array.argsort()[:,-amount:]
def compare_top(model, x_test, y_test):
try:
y_pred = model.predict(x_test)
except AttributeError:
y_pred = model(x_test)
amount = 5
mean_all = np.mean(y_test)
top_index = largest(y_pred,amount)
mean_mach = np.mean(y_test[top_index])
ret = mean_mach-mean_all
print('Difference between mean return and return of calculated top 5: ' + str(ret))
return ret
def internal_goodness(model, x_new, y_new):
print('compare true to predicted values of internal validation on learned dataset: ')
y_pred = model.predict(x_new)
#model.predict nimmt x-werte und gibt die predicteten y-werte zurück
plt.scatter(y_new, y_pred)
def external_goodness(model, x_new, y_new):
print('compare true to predicted values of external validation set: ')
y_pred = model.predict(x_new)
color = ["r", "b", "g"]
for i in range(3):
plt.plot(y_new[:20,i],c=color[i], label = 'true')
plt.plot(y_pred[:20,i], c=color[i], linestyle='--', label = 'pred')
plt.legend()
# Mean über alle returns, mean über ausgewählte Daten
print(np.mean(returns))
print("overall mean = " + str(np.mean(np.mean(returns))))
T 0.000390 AES 0.000102 WMT 0.000318 OMC 0.000423 CTL 0.000181 HRL 0.000814 HCA 0.000562 WEC 0.000604 NWL 0.000511 MTB 0.000491 GOOGL 0.000646 SYF 0.000217 DE 0.000659 MHK 0.000679 AIV 0.000554 ATVI 0.000848 PGR 0.000450 FMC 0.000782 ADI 0.000600 LMT 0.000635 dtype: float64 overall mean = 0.00052331781897321
def mov_avg(x_train):
print(np.array(x_train).shape)
return np.sum(x_train, axis = 1)/x_train.shape[1]
# directional_goodness(mov_avg, x_test, y_test)
# compare_top(mov_avg, x_test, y_test)
# plots(mov_avg, x_test=x_test, x_train=x_train, y_test=y_test, y_train=y_train)
def cnn(x_train, y_train, x_test, y_test, inp_shape, epochs):
verbose, batch_size = 0, 50
epochs = epochs
"""
verbose: 0-kein output, 1-ladebalken, 2-epochenzahlen printen
batch_size: Nicht definieren (https://stackoverflow.com/questions/44747343/keras-input-explanation-input-shape-units-batch-size-dim-etc)
epochs: Anzahl Iterationen durch das Trainingsset
"""
N = inp_shape[1]
#init
model = models.Sequential()
model.add(layers.Conv1D(filters=64, kernel_size=3, activation='relu',
input_shape=inp_shape))
model.add(layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
#Conv1D => 1D Convolution (Schaut nach Mustern)
#mit filters = Anzahl Weight Functions, kernel_size = Anzahl simultan betrachteter Felder,
#relu = 0 für value<0 sonst linear
model.add(layers.Dropout(0.3))
#Dropout sets randomly chosen values to 0 to prevent overfitting
model.add(layers.MaxPooling1D(pool_size=2))
#MaxPooling halbiert array GrƶĆe und nimmt grƶĆte Werte der Feature-Gewichtungen
model.add(layers.Flatten())
#Flatten reduziert dimensionen eines arrays auf niedrigst mögliche Dimension (1xdim) (überschreibt Nullen))
model.add(layers.Dense(100, activation='relu'))
model.add(layers.Dense(N, activation='linear'))
#Klassisches NN hinter Convolutional Layer geschaltet, lernt also im Feature Raum, durch Convolutional Net vorgebenen
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse', 'mae'])
#mean_squared_error (mse) and mean_absolute_error (mae)
#fit network and write in history
history = model.fit(x_train,y_train, epochs=epochs, #batch_size=batch_size,
verbose=verbose)
#evaluate model
#Print error values for classification of goodness
mse,mse2,mae = model.evaluate(x_test,y_test, batch_size=batch_size, verbose=verbose)
print(mse)
print(mse2)
print(mae)
return model,history
def main_cnn(input_df, dur, limit, variant, epochs):
"""
input_df die Input Matrix (duh)
dur Zeitfenster
limit Grenze für Train Test Split
variant prediction of next day, next month average, volatility
"""
N = input_df.shape[1]
D = input_df.shape[0]
train = input_df.iloc[:limit,:]
test = input_df.iloc[limit-dur:,:]
x_train,y_train = get_data_subsets(train, dur, variant="next_day", onehot=0)
x_test,y_test = get_data_subsets(test, dur, variant="next_day", onehot=0)
inp_shape = (dur,N)
x_train = x_train[:,x_train.shape[1]-dur:,:]
x_test = x_test[:,x_test.shape[1]-dur:,:]
"""für Dimensions-tests"""
print('x-train shape: ' + str(x_train.shape))
print('y-train shape: ' + str(y_train.shape))
print('x-test shape: ' + str(x_test.shape))
print('y-test shape: ' + str(y_test.shape))
print('test shape: ' + str(test.shape))
model,history = cnn(x_train, y_train, x_test, y_test, inp_shape, epochs)
return model,history,x_train,y_train,x_test,y_test
model_cnn,history_cnn,x_train,y_train,x_test,y_test = main_cnn(returns, 100, 2000, variant='next_day', epochs=200)
print(model_cnn.summary())
x-train shape: (1900, 100, 20) y-train shape: (1900, 20) x-test shape: (518, 100, 20) y-test shape: (518, 20) test shape: (618, 20) WARNING:tensorflow:From /home/jesse/master_thesis/bayes-master/IntroMLDL-master/env2/env2/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:4070: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead. WARNING:tensorflow:From /home/jesse/master_thesis/bayes-master/IntroMLDL-master/env2/env2/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:422: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead. 0.0002557276840264186 0.0002557276748120785 0.011499723419547081 Model: "sequential_1" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv1d_1 (Conv1D) (None, 98, 64) 3904 _________________________________________________________________ conv1d_2 (Conv1D) (None, 96, 64) 12352 _________________________________________________________________ dropout_1 (Dropout) (None, 96, 64) 0 _________________________________________________________________ max_pooling1d_1 (MaxPooling1 (None, 48, 64) 0 _________________________________________________________________ flatten_1 (Flatten) (None, 3072) 0 _________________________________________________________________ dense_1 (Dense) (None, 100) 307300 _________________________________________________________________ dense_2 (Dense) (None, 20) 2020 ================================================================= Total params: 325,576 Trainable params: 325,576 Non-trainable params: 0 _________________________________________________________________ None
directional_goodness(model_cnn, x_test, y_test)
compare_top(model_cnn, x_test, y_test)
plots(model_cnn, history_cnn, x_test=x_test, x_train=x_train, y_test=y_test, y_train=y_train)
percentage of correctly predicted directions of returns: 0.49604247104247107 Difference between mean return and return of calculated top 5: -0.0011790465671789913
model_cnn_m,history_cnn_m,x_train_m,y_train_m,x_test_m,y_test_m = main_cnn(returns, 100, 2000, variant='month', epochs=200)
print(model_cnn_m.summary())
x-train shape: (1900, 100, 20) y-train shape: (1900, 20) x-test shape: (518, 100, 20) y-test shape: (518, 20) test shape: (618, 20) 0.0002507496839586976 0.00025074969744309783 0.011325869709253311 Model: "sequential_2" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv1d_3 (Conv1D) (None, 98, 64) 3904 _________________________________________________________________ conv1d_4 (Conv1D) (None, 96, 64) 12352 _________________________________________________________________ dropout_2 (Dropout) (None, 96, 64) 0 _________________________________________________________________ max_pooling1d_2 (MaxPooling1 (None, 48, 64) 0 _________________________________________________________________ flatten_2 (Flatten) (None, 3072) 0 _________________________________________________________________ dense_3 (Dense) (None, 100) 307300 _________________________________________________________________ dense_4 (Dense) (None, 20) 2020 ================================================================= Total params: 325,576 Trainable params: 325,576 Non-trainable params: 0 _________________________________________________________________ None
directional_goodness(model_cnn_m, x_test_m, y_test_m)
compare_top(model_cnn_m, x_test_m, y_test_m)
plots(model_cnn_m, history_cnn_m, x_test=x_test_m, x_train=x_train_m, y_test=y_test_m, y_train=y_train_m)
percentage of correctly predicted directions of returns: 0.4992277992277992 Difference between mean return and return of calculated top 5: 0.0013224183490890278
model_cnn_v,history_cnn_v,x_train_v,y_train_v,x_test_v,y_test_v = main_cnn(returns, 100, 2000, variant='vola', epochs=200)
print(model_cnn_v.summary())
x-train shape: (1900, 100, 20) y-train shape: (1900, 20) x-test shape: (518, 100, 20) y-test shape: (518, 20) test shape: (618, 20) 0.00026065440185261984 0.00026065442943945527 0.011642149649560452 Model: "sequential_3" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv1d_5 (Conv1D) (None, 98, 64) 3904 _________________________________________________________________ conv1d_6 (Conv1D) (None, 96, 64) 12352 _________________________________________________________________ dropout_3 (Dropout) (None, 96, 64) 0 _________________________________________________________________ max_pooling1d_3 (MaxPooling1 (None, 48, 64) 0 _________________________________________________________________ flatten_3 (Flatten) (None, 3072) 0 _________________________________________________________________ dense_5 (Dense) (None, 100) 307300 _________________________________________________________________ dense_6 (Dense) (None, 20) 2020 ================================================================= Total params: 325,576 Trainable params: 325,576 Non-trainable params: 0 _________________________________________________________________ None
directional_goodness(model_cnn_v, x_test_v, y_test_v)
compare_top(model_cnn_v, x_test_v, y_test_v)
plots(model_cnn,history_cnn_v, x_test=x_test_v, x_train=x_train_v, y_test=y_test_v, y_train=y_train_v)
percentage of correctly predicted directions of returns: 0.49903474903474904 Difference between mean return and return of calculated top 5: 0.001162603964078791
Expectations:
Results:
def cnn_2inputs(xtrain,ytrain,xtest,ytest,inp_shape1,inp_shape2,epochs):
verbose, batch_size = 0, 50
epochs = epochs
"""
verbose: 0-kein output, 1-ladebalken, 2-epochenzahlen printen
batch_size: Nicht definieren (https://stackoverflow.com/questions/44747343/keras-input-explanation-input-shape-units-batch-size-dim-etc)
epochs: Anzahl Iterationen durch das Trainingsset
"""
i1 = layers.Input(inp_shape1)
i2 = layers.Input(inp_shape2)
N = inp_shape1[1]
#init
model1 = layers.Conv1D(filters=64, kernel_size=3, activation='relu')(i1)
model1 = layers.Conv1D(filters=64, kernel_size=3, activation='relu')(model1)
#Conv1D => 1D Convolution (Schaut nach Mustern)
#mit filters = Anzahl Weight Functions, kernel_size = Anzahl simultan betrachteter Felder,
#relu = 0 für value<0 sonst linear
model1 = layers.Dropout(0.5)(model1)
#Dropout sets randomly chosen values to 0 to prevent overfitting
model1 = layers.MaxPooling1D(pool_size=2)(model1)
#MaxPooling halbiert array GrƶĆe und nimmt grƶĆte Werte der Feature-Gewichtungen
model1 = layers.Flatten()(model1)
#Flatten reduziert dimensionen eines arrays auf niedrigst mögliche Dimension (1xdim) (überschreibt Nullen))
model1 = models.Model(inputs = i1, outputs = model1)
### second network:
model2 = layers.Flatten()(i2)
model2 = layers.Dense(100, activation="relu")(model2)
#model2 = layers.Dense(100, activation="relu")(model2)
model2 = models.Model(inputs = i2, outputs = model2)
combined = layers.concatenate([model1.output, model2.output])
model = layers.Dense(100, activation='relu')(combined)
model = layers.Dense(N, activation='linear')(model)
model = models.Model(inputs = [model1.input, model2.input], outputs = model)
#Klassisches NN hinter Convolutional Layer geschaltet, lernt also im Feature Raum, durch Convolutional Net vorgebenen
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse', 'mae'])
#mean_squared_error (mse) and mean_absolute_error (mae)
#fit network
history = model.fit(xtrain,ytrain, epochs=epochs, #batch_size=batch_size,
verbose=verbose)
#evaluate model
#Print error values for classification of goodness
mse,mse2,mae = model.evaluate(xtest,ytest, batch_size=batch_size, verbose=verbose)
print(mse)
print(mse2)
print(mae)
return model,history
def main_cnn_2inputs(input_df, dur, limit, variant, epochs):
"""
input_df die Input Matrix (duh)
dur Zeitfenster
limit Grenze für Train Test Split
variant prediction of next day, next month average, volatility
"""
N = input_df.shape[1]
D = input_df.shape[0]
train = input_df.iloc[:limit,:]
test = input_df.iloc[limit-dur:,:]
x_train,y_train = get_data_subsets(train, dur, variant, onehot=1)
x_test,y_test = get_data_subsets(test, dur, variant, onehot=1)
inp_shape1 = (dur,N)
inp_shape2 = (x_train.shape[1]-dur,N)
x_train = [x_train[:,inp_shape2[0]:,:],x_train[:,:inp_shape2[0],:]]
x_test = [x_test[:,inp_shape2[0]:,:],x_test[:,:inp_shape2[0],:]]
"""für Dimensions-tests"""
print('x-train shape: ' + str(x_train[0].shape)+ str(x_train[1].shape))
print('y-train shape: ' + str(y_train.shape))
print('x-test shape: ' + str(x_test[0].shape)+ str(x_test[1].shape))
print('y-test shape: ' + str(y_test.shape))
print('test shape: ' + str(test.shape))
model,history = cnn_2inputs(x_train, y_train, x_test, y_test, inp_shape1, inp_shape2, epochs)
return model,history,x_train,y_train,x_test,y_test
model_cnn_2inputs,history_cnn_2inputs,x_train,y_train,x_test,y_test = main_cnn_2inputs(returns, 100, 2000, variant='next_day', epochs=200)
print(model_cnn_2inputs.summary())
x-train shape: (1900, 100, 20)(1900, 10, 20)
y-train shape: (1900, 20)
x-test shape: (518, 100, 20)(518, 10, 20)
y-test shape: (518, 20)
test shape: (618, 20)
0.00025256416541488274
0.00025256414664909244
0.011385714635252953
Model: "model_3"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) (None, 100, 20) 0
__________________________________________________________________________________________________
conv1d_7 (Conv1D) (None, 98, 64) 3904 input_1[0][0]
__________________________________________________________________________________________________
conv1d_8 (Conv1D) (None, 96, 64) 12352 conv1d_7[0][0]
__________________________________________________________________________________________________
dropout_4 (Dropout) (None, 96, 64) 0 conv1d_8[0][0]
__________________________________________________________________________________________________
input_2 (InputLayer) (None, 10, 20) 0
__________________________________________________________________________________________________
max_pooling1d_4 (MaxPooling1D) (None, 48, 64) 0 dropout_4[0][0]
__________________________________________________________________________________________________
flatten_5 (Flatten) (None, 200) 0 input_2[0][0]
__________________________________________________________________________________________________
flatten_4 (Flatten) (None, 3072) 0 max_pooling1d_4[0][0]
__________________________________________________________________________________________________
dense_7 (Dense) (None, 100) 20100 flatten_5[0][0]
__________________________________________________________________________________________________
concatenate_1 (Concatenate) (None, 3172) 0 flatten_4[0][0]
dense_7[0][0]
__________________________________________________________________________________________________
dense_8 (Dense) (None, 100) 317300 concatenate_1[0][0]
__________________________________________________________________________________________________
dense_9 (Dense) (None, 20) 2020 dense_8[0][0]
==================================================================================================
Total params: 355,676
Trainable params: 355,676
Non-trainable params: 0
__________________________________________________________________________________________________
None
directional_goodness(model_cnn_2inputs, x_test, y_test)
compare_top(model_cnn_2inputs, x_test, y_test)
plots(model_cnn_2inputs, history_cnn_2inputs, x_test=x_test, x_train=x_train, y_test=y_test, y_train=y_train)
percentage of correctly predicted directions of returns: 0.4928571428571429 Difference between mean return and return of calculated top 5: -0.0005640597994432985
model_cnn_2inputs_m,history_cnn_2inputs_m,x_train_m,y_train_m,x_test_m,y_test_m = main_cnn_2inputs(returns, 100, 2000, variant='month', epochs=200)
print(model_cnn_2inputs_m.summary())
x-train shape: (1900, 100, 20)(1900, 10, 20)
y-train shape: (1900, 20)
x-test shape: (518, 100, 20)(518, 10, 20)
y-test shape: (518, 20)
test shape: (618, 20)
7.81263266568154e-06
7.812632247805595e-06
0.0021511674858629704
Model: "model_6"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_3 (InputLayer) (None, 100, 20) 0
__________________________________________________________________________________________________
conv1d_9 (Conv1D) (None, 98, 64) 3904 input_3[0][0]
__________________________________________________________________________________________________
conv1d_10 (Conv1D) (None, 96, 64) 12352 conv1d_9[0][0]
__________________________________________________________________________________________________
dropout_5 (Dropout) (None, 96, 64) 0 conv1d_10[0][0]
__________________________________________________________________________________________________
input_4 (InputLayer) (None, 10, 20) 0
__________________________________________________________________________________________________
max_pooling1d_5 (MaxPooling1D) (None, 48, 64) 0 dropout_5[0][0]
__________________________________________________________________________________________________
flatten_7 (Flatten) (None, 200) 0 input_4[0][0]
__________________________________________________________________________________________________
flatten_6 (Flatten) (None, 3072) 0 max_pooling1d_5[0][0]
__________________________________________________________________________________________________
dense_10 (Dense) (None, 100) 20100 flatten_7[0][0]
__________________________________________________________________________________________________
concatenate_2 (Concatenate) (None, 3172) 0 flatten_6[0][0]
dense_10[0][0]
__________________________________________________________________________________________________
dense_11 (Dense) (None, 100) 317300 concatenate_2[0][0]
__________________________________________________________________________________________________
dense_12 (Dense) (None, 20) 2020 dense_11[0][0]
==================================================================================================
Total params: 355,676
Trainable params: 355,676
Non-trainable params: 0
__________________________________________________________________________________________________
None
directional_goodness(model_cnn_2inputs_m, x_test_m, y_test_m)
compare_top(model_cnn_2inputs_m, x_test_m, y_test_m)
plots(model_cnn_2inputs_m, history_cnn_2inputs_m, x_test=x_test_m, x_train=x_train_m, y_test=y_test_m, y_train=y_train_m)
percentage of correctly predicted directions of returns: 0.5517374517374518 Difference between mean return and return of calculated top 5: 0.00032141328067269775
model_cnn_2inputs_v,history_cnn_2inputs_v,x_train_v,y_train_v,x_test_v,y_test_v = main_cnn_2inputs(returns, 100, 2000, variant='vola', epochs=200)
print(model_cnn_2inputs_v.summary())
x-train shape: (1900, 100, 20)(1900, 10, 20)
y-train shape: (1900, 20)
x-test shape: (518, 100, 20)(518, 10, 20)
y-test shape: (518, 20)
test shape: (618, 20)
0.00013362709165961592
0.00013362709432840347
0.007579164579510689
Model: "model_9"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_5 (InputLayer) (None, 100, 20) 0
__________________________________________________________________________________________________
conv1d_11 (Conv1D) (None, 98, 64) 3904 input_5[0][0]
__________________________________________________________________________________________________
conv1d_12 (Conv1D) (None, 96, 64) 12352 conv1d_11[0][0]
__________________________________________________________________________________________________
dropout_6 (Dropout) (None, 96, 64) 0 conv1d_12[0][0]
__________________________________________________________________________________________________
input_6 (InputLayer) (None, 10, 20) 0
__________________________________________________________________________________________________
max_pooling1d_6 (MaxPooling1D) (None, 48, 64) 0 dropout_6[0][0]
__________________________________________________________________________________________________
flatten_9 (Flatten) (None, 200) 0 input_6[0][0]
__________________________________________________________________________________________________
flatten_8 (Flatten) (None, 3072) 0 max_pooling1d_6[0][0]
__________________________________________________________________________________________________
dense_13 (Dense) (None, 100) 20100 flatten_9[0][0]
__________________________________________________________________________________________________
concatenate_3 (Concatenate) (None, 3172) 0 flatten_8[0][0]
dense_13[0][0]
__________________________________________________________________________________________________
dense_14 (Dense) (None, 100) 317300 concatenate_3[0][0]
__________________________________________________________________________________________________
dense_15 (Dense) (None, 20) 2020 dense_14[0][0]
==================================================================================================
Total params: 355,676
Trainable params: 355,676
Non-trainable params: 0
__________________________________________________________________________________________________
None
directional_goodness(model_cnn_2inputs_v, x_test_v, y_test_v)
compare_top(model_cnn_2inputs_v, x_test_v, y_test_v)
plots(model_cnn_2inputs_v, history_cnn_2inputs_v, x_test=x_test_v, x_train=x_train_v, y_test=y_test_v, y_train=y_train_v)
percentage of correctly predicted directions of returns: 0.9901544401544401 Difference between mean return and return of calculated top 5: -0.001558351235556702
Expectations:
Results:
Conclusion for CNNs:
model_fully,history_fully,x_train,y_train,x_test,y_test = main_fully(returns, 100, 2000, variant='next_day', epochs=200)
print(model_fully.summary())
now x-train shape: (1900, 100, 20) y-train shape: (1900, 20) x-test shape: (518, 100, 20) y-test shape: (518, 20) test shape: (618, 20) 0.0002936646605785181 0.00029366466333158314 0.012422925792634487 Model: "sequential_4" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense_16 (Dense) (None, 100, 20) 420 _________________________________________________________________ dense_17 (Dense) (None, 100, 100) 2100 _________________________________________________________________ dropout_7 (Dropout) (None, 100, 100) 0 _________________________________________________________________ dense_18 (Dense) (None, 100, 100) 10100 _________________________________________________________________ flatten_10 (Flatten) (None, 10000) 0 _________________________________________________________________ dense_19 (Dense) (None, 100) 1000100 _________________________________________________________________ dense_20 (Dense) (None, 20) 2020 ================================================================= Total params: 1,014,740 Trainable params: 1,014,740 Non-trainable params: 0 _________________________________________________________________ None
directional_goodness(model_fully, x_test, y_test)
compare_top(model_fully, x_test, y_test)
plots(model_fully, history_fully, x_test=x_test, x_train=x_train, y_test=y_test, y_train=y_train)
percentage of correctly predicted directions of returns: 0.4904440154440154 Difference between mean return and return of calculated top 5: 0.0005426322476837399
model_fully_m, history_fully_m, x_train_m,y_train_m,x_test_m,y_test_m = main_fully(returns, 100, 2000, variant='month', epochs=200)
print(model_fully_m.summary())
now x-train shape: (1900, 100, 20) y-train shape: (1900, 20) x-test shape: (518, 100, 20) y-test shape: (518, 20) test shape: (618, 20) 7.1996492090288675e-06 7.199648734967923e-06 0.0020590219646692276 Model: "sequential_5" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense_21 (Dense) (None, 100, 20) 420 _________________________________________________________________ dense_22 (Dense) (None, 100, 100) 2100 _________________________________________________________________ dropout_8 (Dropout) (None, 100, 100) 0 _________________________________________________________________ dense_23 (Dense) (None, 100, 100) 10100 _________________________________________________________________ flatten_11 (Flatten) (None, 10000) 0 _________________________________________________________________ dense_24 (Dense) (None, 100) 1000100 _________________________________________________________________ dense_25 (Dense) (None, 20) 2020 ================================================================= Total params: 1,014,740 Trainable params: 1,014,740 Non-trainable params: 0 _________________________________________________________________ None
directional_goodness(model_fully_m, x_test_m, y_test_m)
compare_top(model_fully_m, x_test_m, y_test_m)
plots(model_fully_m, history_fully_m, x_test=x_test_m, x_train=x_train_m, y_test=y_test_m, y_train=y_train_m)
percentage of correctly predicted directions of returns: 0.5353281853281853 Difference between mean return and return of calculated top 5: 0.00020139641650024495
model_fully_v,history_fully_v,x_train_v,y_train_v,x_test_v,y_test_v = main_fully(returns, 100, 2000, variant='vola', epochs=200)
print(model_fully_v.summary())
now x-train shape: (1900, 100, 20) y-train shape: (1900, 20) x-test shape: (518, 100, 20) y-test shape: (518, 20) test shape: (618, 20) 0.00014137014877071342 0.00014137015386950225 0.00792038906365633 Model: "sequential_6" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense_26 (Dense) (None, 100, 20) 420 _________________________________________________________________ dense_27 (Dense) (None, 100, 100) 2100 _________________________________________________________________ dropout_9 (Dropout) (None, 100, 100) 0 _________________________________________________________________ dense_28 (Dense) (None, 100, 100) 10100 _________________________________________________________________ flatten_12 (Flatten) (None, 10000) 0 _________________________________________________________________ dense_29 (Dense) (None, 100) 1000100 _________________________________________________________________ dense_30 (Dense) (None, 20) 2020 ================================================================= Total params: 1,014,740 Trainable params: 1,014,740 Non-trainable params: 0 _________________________________________________________________ None
directional_goodness(model_fully_v, x_test_v, y_test_v)
compare_top(model_fully_v, x_test_v, y_test_v)
plots(model_fully_v,history_fully_v, x_test=x_test_v, x_train=x_train_v, y_test=y_test_v, y_train=y_train_v)
percentage of correctly predicted directions of returns: 0.95 Difference between mean return and return of calculated top 5: -0.00030305830944946446
Expectations:
Results:
model_gru,history_gru,x_train,y_train,x_test,y_test = main_gru(returns, 100, 2000, variant='next_day', epochs=200)
print(model_gru.summary())
0.0002597120212270558 0.0002597120183054358 0.011326280422508717 Model: "sequential_7" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= gru_1 (GRU) (None, 100, 42) 7938 _________________________________________________________________ dropout_10 (Dropout) (None, 100, 42) 0 _________________________________________________________________ gru_2 (GRU) (None, 42) 10710 _________________________________________________________________ dense_31 (Dense) (None, 100) 4300 _________________________________________________________________ dense_32 (Dense) (None, 20) 2020 ================================================================= Total params: 24,968 Trainable params: 24,968 Non-trainable params: 0 _________________________________________________________________ None
directional_goodness(model_gru, x_test, y_test)
compare_top(model_gru, x_test, y_test)
plots(model_gru, history_gru, x_test=x_test, x_train=x_train, y_test=y_test, y_train=y_train)
percentage of correctly predicted directions of returns: 0.5034749034749034 Difference between mean return and return of calculated top 5: -3.8859279965349706e-05
model_gru_m,history_gru_m,x_train_m,y_train_m,x_test_m,y_test_m = main_gru(returns, 100, 2000, variant='month', epochs=200)
print(model_gru_m.summary())
9.137530189113879e-06 9.137530469160993e-06 0.002307330956682563 Model: "sequential_8" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= gru_3 (GRU) (None, 100, 42) 7938 _________________________________________________________________ dropout_11 (Dropout) (None, 100, 42) 0 _________________________________________________________________ gru_4 (GRU) (None, 42) 10710 _________________________________________________________________ dense_33 (Dense) (None, 100) 4300 _________________________________________________________________ dense_34 (Dense) (None, 20) 2020 ================================================================= Total params: 24,968 Trainable params: 24,968 Non-trainable params: 0 _________________________________________________________________ None
directional_goodness(model_gru_m, x_test_m, y_test_m)
compare_top(model_gru_m, x_test_m, y_test_m)
plots(model_gru_m, history_gru_m, x_test=x_test_m, x_train=x_train_m, y_test=y_test_m, y_train=y_train_m)
percentage of correctly predicted directions of returns: 0.510907335907336 Difference between mean return and return of calculated top 5: 0.00026008230912750595
model_gru_v,history_gru_v,x_train_v,y_train_v,x_test_v,y_test_v = main_gru(returns, 100, 2000, variant='next_day', epochs=200)
print(model_gru_v.summary())
0.0002543732541784982 0.00025437326985411346 0.011250377632677555 Model: "sequential_9" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= gru_5 (GRU) (None, 100, 42) 7938 _________________________________________________________________ dropout_12 (Dropout) (None, 100, 42) 0 _________________________________________________________________ gru_6 (GRU) (None, 42) 10710 _________________________________________________________________ dense_35 (Dense) (None, 100) 4300 _________________________________________________________________ dense_36 (Dense) (None, 20) 2020 ================================================================= Total params: 24,968 Trainable params: 24,968 Non-trainable params: 0 _________________________________________________________________ None
directional_goodness(model_gru_v, x_test, y_test)
compare_top(model_gru_v, x_test_v, y_test_v)
plots(model_gru_v, history_gru_v, x_test=x_test_v, x_train=x_train_v, y_test=y_test_v, y_train=y_train_v)
percentage of correctly predicted directions of returns: 0.5006756756756757 Difference between mean return and return of calculated top 5: 0.0005369729794032647
Expectations:
Results:
model_gru_2inputs,history_gru_2inputs,x_train,y_train,x_test,y_test = main_gru_2inputs(returns, 100, 2000, variant='next_day', epochs=200)
print(model_gru_2inputs.summary())
x-train shape: (1900, 100, 20)(1900, 10, 20)
y-train shape: (1900, 20)
x-test shape: (518, 100, 20)(518, 10, 20)
y-test shape: (518, 20)
test shape: (618, 20)
0.00032339780793694283
0.00032339783501811326
0.012365271337330341
Model: "model_12"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_7 (InputLayer) (None, 100, 20) 0
__________________________________________________________________________________________________
gru_7 (GRU) (None, 100, 100) 36300 input_7[0][0]
__________________________________________________________________________________________________
input_8 (InputLayer) (None, 10, 20) 0
__________________________________________________________________________________________________
dropout_13 (Dropout) (None, 100, 100) 0 gru_7[0][0]
__________________________________________________________________________________________________
flatten_13 (Flatten) (None, 200) 0 input_8[0][0]
__________________________________________________________________________________________________
gru_8 (GRU) (None, 100) 60300 dropout_13[0][0]
__________________________________________________________________________________________________
dense_37 (Dense) (None, 100) 20100 flatten_13[0][0]
__________________________________________________________________________________________________
concatenate_4 (Concatenate) (None, 200) 0 gru_8[0][0]
dense_37[0][0]
__________________________________________________________________________________________________
dense_38 (Dense) (None, 100) 20100 concatenate_4[0][0]
__________________________________________________________________________________________________
dense_39 (Dense) (None, 20) 2020 dense_38[0][0]
==================================================================================================
Total params: 138,820
Trainable params: 138,820
Non-trainable params: 0
__________________________________________________________________________________________________
None
directional_goodness(model_gru_2inputs, x_test, y_test)
compare_top(model_gru_2inputs, x_test, y_test)
plots(model_gru_2inputs, history_gru_2inputs, x_test=x_test, x_train=x_train, y_test=y_test, y_train=y_train)
percentage of correctly predicted directions of returns: 0.48310810810810806 Difference between mean return and return of calculated top 5: 0.0002469440326575646
model_gru_2inputs_m,history_gru_2inputs_m,x_train_m,y_train_m,x_test_m,y_test_m = main_gru_2inputs(returns, 100, 2000, variant='month', epochs=200)
print(model_gru_2inputs_m.summary())
x-train shape: (1900, 100, 20)(1900, 10, 20)
y-train shape: (1900, 20)
x-test shape: (518, 100, 20)(518, 10, 20)
y-test shape: (518, 20)
test shape: (618, 20)
2.8101787077057135e-05
2.8101787393097766e-05
0.003951292019337416
Model: "model_15"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_9 (InputLayer) (None, 100, 20) 0
__________________________________________________________________________________________________
gru_9 (GRU) (None, 100, 100) 36300 input_9[0][0]
__________________________________________________________________________________________________
input_10 (InputLayer) (None, 10, 20) 0
__________________________________________________________________________________________________
dropout_14 (Dropout) (None, 100, 100) 0 gru_9[0][0]
__________________________________________________________________________________________________
flatten_14 (Flatten) (None, 200) 0 input_10[0][0]
__________________________________________________________________________________________________
gru_10 (GRU) (None, 100) 60300 dropout_14[0][0]
__________________________________________________________________________________________________
dense_40 (Dense) (None, 100) 20100 flatten_14[0][0]
__________________________________________________________________________________________________
concatenate_5 (Concatenate) (None, 200) 0 gru_10[0][0]
dense_40[0][0]
__________________________________________________________________________________________________
dense_41 (Dense) (None, 100) 20100 concatenate_5[0][0]
__________________________________________________________________________________________________
dense_42 (Dense) (None, 20) 2020 dense_41[0][0]
==================================================================================================
Total params: 138,820
Trainable params: 138,820
Non-trainable params: 0
__________________________________________________________________________________________________
None
directional_goodness(model_gru_2inputs_m, x_test_m, y_test_m)
compare_top(model_gru_2inputs_m, x_test_m, y_test_m)
plots(model_gru_2inputs_m, history_gru_2inputs_m, x_test=x_test_m, x_train=x_train_m, y_test=y_test_m, y_train=y_train_m)
percentage of correctly predicted directions of returns: 0.5152509652509653 Difference between mean return and return of calculated top 5: 0.0004070091810861406
model_gru_2inputs_v,history_gru_2inputs_v,x_train_v,y_train_v,x_test_v,y_test_v = main_gru_2inputs(returns, 100, 2000, variant='vola', epochs=200)
print(model_gru_2inputs.summary())
x-train shape: (1900, 100, 20)(1900, 10, 20)
y-train shape: (1900, 20)
x-test shape: (518, 100, 20)(518, 10, 20)
y-test shape: (518, 20)
test shape: (618, 20)
0.0001323942668296839
0.00013239427062217146
0.0077064549550414085
Model: "model_12"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_7 (InputLayer) (None, 100, 20) 0
__________________________________________________________________________________________________
gru_7 (GRU) (None, 100, 100) 36300 input_7[0][0]
__________________________________________________________________________________________________
input_8 (InputLayer) (None, 10, 20) 0
__________________________________________________________________________________________________
dropout_13 (Dropout) (None, 100, 100) 0 gru_7[0][0]
__________________________________________________________________________________________________
flatten_13 (Flatten) (None, 200) 0 input_8[0][0]
__________________________________________________________________________________________________
gru_8 (GRU) (None, 100) 60300 dropout_13[0][0]
__________________________________________________________________________________________________
dense_37 (Dense) (None, 100) 20100 flatten_13[0][0]
__________________________________________________________________________________________________
concatenate_4 (Concatenate) (None, 200) 0 gru_8[0][0]
dense_37[0][0]
__________________________________________________________________________________________________
dense_38 (Dense) (None, 100) 20100 concatenate_4[0][0]
__________________________________________________________________________________________________
dense_39 (Dense) (None, 20) 2020 dense_38[0][0]
==================================================================================================
Total params: 138,820
Trainable params: 138,820
Non-trainable params: 0
__________________________________________________________________________________________________
None
directional_goodness(model_gru_2inputs_v, x_test_v, y_test_v)
compare_top(model_gru_2inputs_v, x_test_v, y_test_v)
plots(model_gru_2inputs_v, history_gru_2inputs_v, x_test=x_test_v, x_train=x_train_v, y_test=y_test_v, y_train=y_train_v)
percentage of correctly predicted directions of returns: 0.9893822393822393 Difference between mean return and return of calculated top 5: 0.0006125708075848842
Expectations:
Results:
$n_i$ number of stocks
$n_o$ number of days
model_lstm,hisory_lstm,x_train,y_train,x_test,y_test = main_lstm(returns, 100, 2000, variant='next_day', epochs=200)
print(model_lstm.summary())
x-train shape: (1900, 100, 20) y-train shape: (1900, 20) x-test shape: (518, 100, 20) y-test shape: (518, 20) test shape: (618, 20) 0.0002507366102708424 0.00025073662982322276 0.011051219888031483 Model: "sequential_10" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= lstm_1 (LSTM) (None, 100, 90) 39960 _________________________________________________________________ dropout_16 (Dropout) (None, 100, 90) 0 _________________________________________________________________ lstm_2 (LSTM) (None, 90) 65160 _________________________________________________________________ dense_46 (Dense) (None, 100) 9100 _________________________________________________________________ dense_47 (Dense) (None, 20) 2020 ================================================================= Total params: 116,240 Trainable params: 116,240 Non-trainable params: 0 _________________________________________________________________ None
directional_goodness(model_lstm, x_test, y_test)
compare_top(model_lstm, x_test, y_test)
plots(model_lstm, hisory_lstm,x_test=x_test, x_train=x_train, y_test=y_test, y_train=y_train)
percentage of correctly predicted directions of returns: 0.5128378378378378 Difference between mean return and return of calculated top 5: -0.00018476511169894755
model_lstm_m,hisory_lstm_m,x_train_m,y_train_m,x_test_m,y_test_m = main_lstm(returns, 100, 2000, variant='month', epochs=200)
print(model_lstm_m.summary())
x-train shape: (1900, 100, 20) y-train shape: (1900, 20) x-test shape: (518, 100, 20) y-test shape: (518, 20) test shape: (618, 20) 1.3222612541009756e-05 1.3222612324170768e-05 0.0026961651165038347 Model: "sequential_11" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= lstm_3 (LSTM) (None, 100, 90) 39960 _________________________________________________________________ dropout_17 (Dropout) (None, 100, 90) 0 _________________________________________________________________ lstm_4 (LSTM) (None, 90) 65160 _________________________________________________________________ dense_48 (Dense) (None, 100) 9100 _________________________________________________________________ dense_49 (Dense) (None, 20) 2020 ================================================================= Total params: 116,240 Trainable params: 116,240 Non-trainable params: 0 _________________________________________________________________ None
directional_goodness(model_lstm_m, x_test_m, y_test_m)
compare_top(model_lstm_m, x_test_m, y_test_m)
plots(model_lstm_m, hisory_lstm_m, x_test=x_test_m, x_train=x_train_m, y_test=y_test_m, y_train=y_train_m)
percentage of correctly predicted directions of returns: 0.5201737451737452 Difference between mean return and return of calculated top 5: 0.0003312802386900496
model_lstm_v,hisory_lstm_v,x_train_v,y_train_v,x_test_v,y_test_v = main_lstm(returns, 100, 2000, variant='vola', epochs=200)
print(model_lstm_v.summary())
x-train shape: (1900, 100, 20) y-train shape: (1900, 20) x-test shape: (518, 100, 20) y-test shape: (518, 20) test shape: (618, 20) 0.00012350569296678578 0.00012350569886621088 0.007589756511151791 Model: "sequential_12" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= lstm_5 (LSTM) (None, 100, 90) 39960 _________________________________________________________________ dropout_18 (Dropout) (None, 100, 90) 0 _________________________________________________________________ lstm_6 (LSTM) (None, 90) 65160 _________________________________________________________________ dense_50 (Dense) (None, 100) 9100 _________________________________________________________________ dense_51 (Dense) (None, 20) 2020 ================================================================= Total params: 116,240 Trainable params: 116,240 Non-trainable params: 0 _________________________________________________________________ None
directional_goodness(model_lstm_v, x_test_v, y_test_v)
compare_top(model_lstm_v, x_test_v, y_test_v)
plots(model_lstm_v, hisory_lstm_v,x_test=x_test_v, x_train=x_train_v, y_test=y_test_v, y_train=y_train_v)
percentage of correctly predicted directions of returns: 0.9915057915057914 Difference between mean return and return of calculated top 5: -0.000556972766569382
Expectations:
Results:
model_lstm_2inputs,history_lstm_2inputs,x_train,y_train,x_test,y_test = main_lstm_2inputs(returns, 100, 2000, variant='next_day', epochs=200)
print(model_lstm_2inputs.summary())
x-train shape: (1900, 100, 20)(1900, 10, 20)
y-train shape: (1900, 20)
x-test shape: (518, 100, 20)(518, 10, 20)
y-test shape: (518, 20)
test shape: (618, 20)
0.0003171823962643728
0.00031718239188194275
0.012427020817995071
Model: "model_21"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_13 (InputLayer) (None, 100, 20) 0
__________________________________________________________________________________________________
lstm_7 (LSTM) (None, 100, 90) 39960 input_13[0][0]
__________________________________________________________________________________________________
input_14 (InputLayer) (None, 10, 20) 0
__________________________________________________________________________________________________
dropout_19 (Dropout) (None, 100, 90) 0 lstm_7[0][0]
__________________________________________________________________________________________________
flatten_16 (Flatten) (None, 200) 0 input_14[0][0]
__________________________________________________________________________________________________
lstm_8 (LSTM) (None, 90) 65160 dropout_19[0][0]
__________________________________________________________________________________________________
dense_52 (Dense) (None, 100) 20100 flatten_16[0][0]
__________________________________________________________________________________________________
concatenate_7 (Concatenate) (None, 190) 0 lstm_8[0][0]
dense_52[0][0]
__________________________________________________________________________________________________
dense_53 (Dense) (None, 100) 19100 concatenate_7[0][0]
__________________________________________________________________________________________________
dense_54 (Dense) (None, 20) 2020 dense_53[0][0]
==================================================================================================
Total params: 146,340
Trainable params: 146,340
Non-trainable params: 0
__________________________________________________________________________________________________
None
directional_goodness(model_lstm_2inputs, x_test, y_test)
compare_top(model_lstm_2inputs, x_test, y_test)
plots(model_lstm_2inputs, history_lstm_2inputs, x_test=x_test, x_train=x_train, y_test=y_test, y_train=y_train)
percentage of correctly predicted directions of returns: 0.5001930501930503 Difference between mean return and return of calculated top 5: -0.0004274630955564235
model_lstm_2inputs_m,history_lstm_2inputs_m,x_train_m,y_train_m,x_test_m,y_test_m = main_lstm_2inputs(returns, 100, 2000, variant='next_day', epochs=200)
print(model_lstm_2inputs_m.summary())
x-train shape: (1900, 100, 20)(1900, 10, 20)
y-train shape: (1900, 20)
x-test shape: (518, 100, 20)(518, 10, 20)
y-test shape: (518, 20)
test shape: (618, 20)
0.00029893302366328084
0.0002989330096170306
0.012010184116661549
Model: "model_24"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_15 (InputLayer) (None, 100, 20) 0
__________________________________________________________________________________________________
lstm_9 (LSTM) (None, 100, 90) 39960 input_15[0][0]
__________________________________________________________________________________________________
input_16 (InputLayer) (None, 10, 20) 0
__________________________________________________________________________________________________
dropout_20 (Dropout) (None, 100, 90) 0 lstm_9[0][0]
__________________________________________________________________________________________________
flatten_17 (Flatten) (None, 200) 0 input_16[0][0]
__________________________________________________________________________________________________
lstm_10 (LSTM) (None, 90) 65160 dropout_20[0][0]
__________________________________________________________________________________________________
dense_55 (Dense) (None, 100) 20100 flatten_17[0][0]
__________________________________________________________________________________________________
concatenate_8 (Concatenate) (None, 190) 0 lstm_10[0][0]
dense_55[0][0]
__________________________________________________________________________________________________
dense_56 (Dense) (None, 100) 19100 concatenate_8[0][0]
__________________________________________________________________________________________________
dense_57 (Dense) (None, 20) 2020 dense_56[0][0]
==================================================================================================
Total params: 146,340
Trainable params: 146,340
Non-trainable params: 0
__________________________________________________________________________________________________
None
directional_goodness(model_lstm_2inputs_m, x_test_m, y_test_m)
compare_top(model_lstm_2inputs_m, x_test_m, y_test_m)
plots(model_lstm_2inputs_m, history_lstm_2inputs_m, x_test=x_test_m, x_train=x_train_m, y_test=y_test_m, y_train=y_train_m)
percentage of correctly predicted directions of returns: 0.5105212355212355 Difference between mean return and return of calculated top 5: -0.0029508694687925964
model_lstm_2inputs_v,history_lstm_2inputs_v,x_train_v,y_train_v,x_test_v,y_test_v = main_lstm_2inputs(returns, 100, 2000, variant='next_day', epochs=200)
print(model_lstm_2inputs_v.summary())
x-train shape: (1900, 100, 20)(1900, 10, 20)
y-train shape: (1900, 20)
x-test shape: (518, 100, 20)(518, 10, 20)
y-test shape: (518, 20)
test shape: (618, 20)
0.00035874680333912847
0.00035874679451808333
0.012878079898655415
Model: "model_27"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_17 (InputLayer) (None, 100, 20) 0
__________________________________________________________________________________________________
lstm_11 (LSTM) (None, 100, 90) 39960 input_17[0][0]
__________________________________________________________________________________________________
input_18 (InputLayer) (None, 10, 20) 0
__________________________________________________________________________________________________
dropout_21 (Dropout) (None, 100, 90) 0 lstm_11[0][0]
__________________________________________________________________________________________________
flatten_18 (Flatten) (None, 200) 0 input_18[0][0]
__________________________________________________________________________________________________
lstm_12 (LSTM) (None, 90) 65160 dropout_21[0][0]
__________________________________________________________________________________________________
dense_58 (Dense) (None, 100) 20100 flatten_18[0][0]
__________________________________________________________________________________________________
concatenate_9 (Concatenate) (None, 190) 0 lstm_12[0][0]
dense_58[0][0]
__________________________________________________________________________________________________
dense_59 (Dense) (None, 100) 19100 concatenate_9[0][0]
__________________________________________________________________________________________________
dense_60 (Dense) (None, 20) 2020 dense_59[0][0]
==================================================================================================
Total params: 146,340
Trainable params: 146,340
Non-trainable params: 0
__________________________________________________________________________________________________
None
directional_goodness(model_lstm_2inputs_v, x_test_v, y_test_v)
compare_top(model_lstm_2inputs_v, x_test_v, y_test_v)
plots(model_lstm_2inputs_v, history_lstm_2inputs_v, x_test=x_test_v, x_train=x_train_v, y_test=y_test_v, y_train=y_train_v)
percentage of correctly predicted directions of returns: 0.5095559845559846 Difference between mean return and return of calculated top 5: 0.0010261924111221645
Neural Networks don“t really do a good job on predicting stock markets
Patterns are too variable to be recognised as patterns by machines
time series prediction works ok
Input Data Transformations can be optimized, so that maybe structure is recognizable
Neural Networks can be optimized
More computational power could help
One-Hot Encoded Information of Industry sectors usually improves prediction power (by a small margin)
large scale study of these comparisons would still be interesting since it seems like not all networks have learned to convergence
Learning curves of networks with OHE additional Information show weird behaviour where further investigation into network model building is promising